## Factiva Immigration Topic Prevalence (UKRAINE) ##
load("factivatopic_ukraine.RData")

factivatopic_ukraine$Day <- as.numeric(factivatopic_ukraine$Day)

factivatopic_ukraine$up <-  factivatopic_ukraine$ImmigrationProp + 
  1.96 * sqrt((factivatopic_ukraine$ImmigrationProp * 
                 (1 - factivatopic_ukraine$ImmigrationProp)) / factivatopic_ukraine$AllArticles)
factivatopic_ukraine$low <-  factivatopic_ukraine$ImmigrationProp - 
  1.96 * sqrt((factivatopic_ukraine$ImmigrationProp * 
                 (1 - factivatopic_ukraine$ImmigrationProp)) / factivatopic_ukraine$AllArticles)


factivaplot_ukr <- ggplot(factivatopic_ukraine, aes(x = Day, y = ImmigrationProp, group = 1)) + 
  geom_path(size = 2, color = "slateblue4") +
  geom_path(aes(x = Day, y = up),size = 1, linetype = "dashed") + 
  geom_path(aes(x = Day, y = low),size = 1, linetype = "dashed") + 
  scale_x_continuous(breaks= c(-7, -6, -5, -4,-3,-2,-1, 0, 1,2,3,4,5,6,7)) +
  xlab("Days") +
  ylab("Prevalence") +
  geom_vline(xintercept = 0, color = "black", linetype = "dashed", size = 1) +
  theme_light(base_size = 25) +
  ggtitle("Prevalence of Immigration Topic")
factivaplot_ukr



## Factiva Ukraine dictionary based sentiment analysis
length(data_dictionary_LSD2015)
data_dictionary_LSD2015

load("ukr_df.RData")

ukr_df <- ukr_df %>% 
  drop_na(clean_text)


ukr_df$docnum <- 1:510

ukr_dfcorp <- corpus(ukr_df,text_field="clean_text")

ukr_dftoks <- tokens(ukr_dfcorp,
                   remove_punct=T,
                   remove_numbers=T) %>%
  tokens_remove(stopwords("en"))

ukr_dftoks <- tokens_remove(ukr_dftoks, c("can", "now", "like", "just", "us", "yes", "something", "must"))


ukr_df_dfm <- dfm(ukr_dftoks)
ukr_df_dfm


ukr_df_sentiment <- dfm_group(ukr_df_dfm) %>% 
  dfm_lookup(dictionary = data_dictionary_LSD2015[1:2])
ukr_df_sentiment

# as a percentage 
ukr_df_sentiment_prop <- dfm_group(ukr_df_dfm) %>% 
  dfm_weight(scheme = "prop") %>% 
  dfm_lookup(dictionary = data_dictionary_LSD2015[1:2])
ukr_df_sentiment_prop


ukr_dfsent <- convert(ukr_df_sentiment_prop, to = "data.frame")
ukr_dfsent$docnum <- 1:510

## immigration dictionary (based on Ruedin & Morales 2017)

# creating the dictionary
immidict <- dictionary(list(immigration = c("asylum", "border",
                                            "citizen*", "cultur*",
                                            "deport*", "ethnic*",
                                            "foreign*", "halal",
                                            "hallal", "identity",
                                            "immigr*", "integrat*",
                                            "irregular", "migrant*",
                                            "*migration*", "minorit*",
                                            "multicultur*", "naturalis*",
                                            "naturaliz*", "permit",
                                            "refug*", "religious", "reunion",
                                            "temple", "unauthorised", "unauthorized",
                                            "unity", "evac*", "flee*",
                                            "airport", "checkpoint", "escap*")))


# searching the tokens for matches with the dictionary
ukr_dftoks_dict <- tokens_lookup(ukr_dftoks, dictionary = immidict)
print(ukr_dftoks_dict)

ukr_dfdfm_dict<-dfm(ukr_dftoks_dict)
ukr_dfdfm_dict

ukr_dfdictdf <- convert(dfm(ukr_dftoks_dict), to="data.frame")
ukr_dfdictdf$docnum <- 1:510


# merging the original dataframe with the dictionary matches and the sentiments
ukr <- full_join(ukr_df, ukr_dfdictdf)
ukr_sent <- full_join(ukr, ukr_dfsent)

ukr_sent$sentiment <- ukr_sent$positive - ukr_sent$negative

## sentiment by day
ukr_sent_2 <- ukr_sent %>%
  group_by(invasion) %>%
  summarize(mean = mean(sentiment, na.rm = TRUE)) %>%
  ungroup()

# plots
ukr_sent_2$upper_green <- pmax(ukr_sent_2$mean, 0)
ukr_sent_2$lower_green <- 0
ukr_sent_2$upper_red <- 0
ukr_sent_2$lower_red <- pmin(ukr_sent_2$mean, 0)

ukr_lexicoder <- ggplot(ukr_sent_2, aes(x = invasion, y = mean)) + 
  geom_line(size = 2, color = "slateblue4") +
  geom_ribbon(aes(ymin = lower_green, ymax = upper_green), fill = "green", alpha = 0.5) +
  geom_ribbon(aes(ymin = lower_red, ymax = upper_red), fill = "red", alpha = 0.5) +
  xlab("Days") +
  ylab("Sentiment") +
  scale_x_continuous(breaks= c(-7, -6, -5, -4,-3,-2,-1, 0, 1,2,3,4,5,6,7)) +
  geom_vline(xintercept = 0, color = "black", linetype = "dashed", size = 1) +
  theme_light(base_size = 25) +
  ggtitle("Sentiment Lexicoder")
ukr_lexicoder



### Factiva Ukraine sentiment analysis with VADER ### 
ukr_sent$text_trimmed <- substr(ukr_sent$clean_text, 1, 9999)

set.seed(2023)
ukr_sent_v <- as.data.frame(vader_df(ukr_sent$text_trimmed))

ukr_sent_v$docnum <- 1:510
ukr_sent_final <- full_join(ukr_sent, ukr_sent_v, by = "docnum")
save(ukr_sent_final, file = "ukr_sent_final.RData")
load("ukr_sent_final.RData")

## sentiment by day
ukr_sent_final_2 <- ukr_sent_final %>%
  group_by(invasion) %>%
  summarize(mean = mean(compound, na.rm = TRUE)) %>%
  ungroup()

# plots
ukr_sent_final_2$upper_green <- pmax(ukr_sent_final_2$mean, 0)
ukr_sent_final_2$lower_green <- 0
ukr_sent_final_2$upper_red <- 0
ukr_sent_final_2$lower_red <- pmin(ukr_sent_final_2$mean, 0)

ukr_vader <- ggplot(ukr_sent_final_2, aes(x = invasion, y = mean)) + 
  geom_line(size = 2, color = "slateblue4") +
  geom_ribbon(aes(ymin = lower_green, ymax = upper_green), fill = "green", alpha = 0.5) +
  geom_ribbon(aes(ymin = lower_red, ymax = upper_red), fill = "red", alpha = 0.5) +
  xlab("Days") +
  ylab("Sentiment") +
  scale_x_continuous(breaks= c(-7, -6, -5, -4,-3,-2,-1, 0, 1,2,3,4,5,6,7)) +
  geom_vline(xintercept = 0, color = "black", linetype = "dashed", size = 1) +
  theme_light(base_size = 25) +
  ggtitle("Sentiment VADER")
ukr_vader




## factiva lexicoder
ukr_sent_final$treat[ukr_sent_final$invasion > -1] <- 1
ukr_sent_final$treat[ukr_sent_final$invasion < 0] <- 0
table(ukr_sent_final$treat)
ukr_faclex <- lm(sentiment ~ treat, data = ukr_sent_final)
ukr_faclex <- summary(ukr_faclex)

ukr_faclexC <- ukr_faclex$coefficients[2,1]
ukr_faclexU <- ukr_faclex$coefficients[2,1] + ukr_faclex$coefficients[2,2] * 1.96
ukr_faclexL <- ukr_faclex$coefficients[2,1] - ukr_faclex$coefficients[2,2] * 1.96


## factiva vader
ukr_sent_final$treat[ukr_sent_final$invasion > -1] <- 1
ukr_sent_final$treat[ukr_sent_final$invasion < 0] <- 0
table(ukr_sent_final$treat)
ukr_facvad <- lm(compound ~ treat, data = ukr_sent_final)
ukr_facvad <- summary(ukr_facvad)

ukr_facvadC <- ukr_facvad$coefficients[2,1]
ukr_facvadU <- ukr_facvad$coefficients[2,1] + ukr_facvad$coefficients[2,2] * 1.96
ukr_facvadL <- ukr_facvad$coefficients[2,1] - ukr_facvad$coefficients[2,2] * 1.96


lexukrplot <- ggplot(ukr_sent_2, aes(x = invasion, y = mean)) + 
  geom_line(size = 2, color = "slateblue4") +
  geom_point(aes(x = 0, y = ukr_faclexC ), size = 8) +
  geom_errorbar(aes(x = 0, ymin = ukr_faclexL, ymax = ukr_faclexU), size = 3) +
  geom_ribbon(aes(ymin = lower_green, ymax = upper_green), fill = "green", alpha = 0.5) +
  geom_ribbon(aes(ymin = lower_red, ymax = upper_red), fill = "red", alpha = 0.5) +
  xlab("Days") +
  ylab("Sentiment") +
  scale_x_continuous(breaks= c(-7, -6, -5, -4,-3,-2,-1, 0, 1,2,3,4,5,6,7)) +
  geom_vline(xintercept = 0, color = "black", linetype = "dashed", size = 1) +
  theme_light(base_size = 25) +
  ggtitle("Sentiment Lexicoder")
lexukrplot


vadukrplot <- ggplot(ukr_sent_final_2, aes(x = invasion, y = mean)) + 
  geom_line(size = 2, color = "slateblue4") +
  geom_point(aes(x = 0, y = ukr_facvadC), size = 8) +
  geom_errorbar(aes(x = 0, ymin = ukr_facvadL, ymax = ukr_facvadU), size = 3) +
  geom_ribbon(aes(ymin = lower_green, ymax = upper_green), fill = "green", alpha = 0.5) +
  geom_ribbon(aes(ymin = lower_red, ymax = upper_red), fill = "red", alpha = 0.5) +
  xlab("Days") +
  ylab("Sentiment") +
  scale_x_continuous(breaks= c(-7, -6, -5, -4,-3,-2,-1, 0, 1,2,3,4,5,6,7)) +
  geom_vline(xintercept = 0, color = "black", linetype = "dashed", size = 1) +
  theme_light(base_size = 25) +
  ggtitle("Sentiment VADER")
vadukrplot


text_corner <- text_grob("Factiva data for 1 week \nbefore and after the Russian \ninvasion of Ukraine.", 
                         size = 30, face = "bold", color = "black", x = .4)


ggarrange(factivaplot_ukr,lexukrplot,vadukrplot,text_corner)
